L05 Annotation & Positioning

Data Visualization (STAT 302)

Author

Shuo Han

Overview

The goal of this lab is to explore methods for annotating and positioning with ggplot2 plots. This lab also utilizes scale_* to a greater degree which is part of our next reading. In fact, students may find going through/reading chapter 11 Colour scales and legends useful.

Datasets

We’ll be using the blue_jays.rda, titanic.rda, Aus_athletes.rda, and tech_stocks.rda datasets.

Code
# Load package(s)
library(tidyverse)
library(ggrepel)
library(cowplot)
library(patchwork)
# Load data
load("data/blue_jays.rda")
load("data/titanic.rda")
load("data/Aus_athletes.rda")
load("data/tech_stocks.rda")

Exercises

Complete the following exercises.

Exercise 1

Using the blue_jays.rda dataset, recreate the following graphic as precisely as possible.

Hints:

  • Transparency is 0.8
  • Point size 2
  • Create a label_info dataset that is a subset of original data, just with the 2 birds to be labeled
  • Shift label text horizontally by 0.5
  • See ggplot2 textbook 8.3 building custom annotations
  • Annotation size is 4
  • Classic theme
Code
#Annotation
x_rng = range(blue_jays$Mass)
y_rng = range(blue_jays$Head)

bird_list = c("1142-05914","702-90567")
caption = "Head length versus body mass for 123 blue jays"
# label data
label_info = blue_jays %>% 
  filter(BirdID %in% bird_list)

#bulid plot
ggplot(data = blue_jays, mapping = aes(x= Mass, y = Head, color = KnownSex))+
  geom_point(
    show.legend = FALSE, 
    alpha = 0.8,
    size = 2)+
  geom_text(
    data = label_info,
    mapping = aes(label = KnownSex),
    nudge_x = 0.5,
    show.legend = FALSE
  )+
  annotate(
    geom = "text",
    x = x_rng[1],
    y = y_rng[2],
    label = caption,
    hjust = 0,
    vjust = 1   # put the adjust point at the corner
  )+
  labs(
    x = "Body mass (g)",
    y = "Head length (mm)"
  )+
  theme_classic()

Exercise 2

Using the tech_stocks dataset, recreate the following graphics as precisely as possible.

Plot 1

Code
#create caption 
caption <- paste("Stock price over time for four major tech companies")

#add string wrap 
#will break the caption into multiple lines if longer than 40 characters
# '\n' is the line break code 
caption_print <- paste(strwrap(caption, 40), collapse = "\n")  

#create labels df with most recent stock values 
Labels <- tech_stocks %>%
  ungroup() %>%
  #order by date 
  arrange(desc(date)) %>% 
  # return distinct company
  distinct(company, .keep_all = TRUE) 

#get range for x and y variables 
xrng <- range(tech_stocks$date)
yrng <- range(tech_stocks$price_indexed)

tech_stocks <- tech_stocks %>%
  ungroup()

Hints:

  • Create a label_info dataset that is a subset of original data, just containing the last day’s information for each of the 4 stocks
  • serif font
  • Annotation size is 4 #stock price by date
Code
#stock price by date 
ggplot(tech_stocks, aes(date, price_indexed)) +
#put color in geom_line so labels are not colored
  geom_line(aes(color = company)) +
#remove all legends 
  theme(legend.position  =  "none") +
#remove x label 
  xlab(NULL) +
  ylab("Stock price, indexed") +
  annotate("text",
           x = xrng[1], 
           y = yrng[2],
#put text in the top left corner of plot 
           label = caption,
#left justify 
           hjust = 0,
#bottom justify 
           vjust = 1,
#use serif font 
           family = "serif",
#size the font
           size = 4) +
#add company labels to the most recent stock price 
  geom_text_repel(data = Labels, aes(label = company),
                  box.padding = 0.6,
                  min.segment.length = 0,
                  hjust = 1,
                  seed = 9876)

Plot 2

Hints:

  • Package ggrepel
    • box.padding is 0.6
    • Minimum segment length is 0
    • Horizontal justification is to the right
    • seed of 9876
  • Annotation size is 4
  • serif font
Code
#stock price by date 
ggplot(tech_stocks, aes(date, price_indexed)) +
#put color in geom_line so labels are not colored
  geom_line(aes(color = company)) +
#remove all legends 
  theme(legend.position  =  "none") +
#remove x label 
  xlab(NULL) +
  ylab("Stock price, indexed") +
  annotate("text",
           x = xrng[1], 
           y = yrng[2],
#put text in the top left corner of plot 
           label = caption,
#left justify 
           hjust = 0,
#bottom justify 
           vjust = 1,
#use serif font 
           family = "serif",
#size the font
           size = 4) +
#add company labels to the most recent stock price 
  geom_text_repel(data = Labels, aes(label = company),
                  box.padding = 0.6,
                  min.segment.length = 0,
                  hjust = 1,
                  seed = 9876)

Exercise 3

Using the titanic.rda dataset, recreate the following graphic as precisely as possible.

Code
ggplot(titanic, aes(sex, fill = sex)) +
  geom_bar() +
  facet_grid(factor(survived, labels = c("died", "survived"))
    ~class) +
  scale_fill_manual(values = c("#D55E00D0", "#0072B2D0")) +
  theme_minimal() +
  theme(legend.position = "none")

Hints:

  • Create a new variable that uses died and survived as levels/categories
  • Hex colors: #D55E00D0, #0072B2D0 (no alpha is being used)

Exercise 4

Use the athletes_dat dataset — extracted from Aus_althetes.rda — to recreate the following graphic as precisely as possible. Create the graphic twice: once using patchwork and once using cowplot.

Code
# Get list of sports played by BOTH sexes
both_sports <- Aus_athletes %>%
  # dataset of columns sex and sport 
  # only unique observations
  distinct(sex, sport) %>%
  # see if sport is played by one gender or both
  count(sport) %>%
  # only want sports played by BOTH sexes
  filter(n == 2) %>%
  # get list of sports
  pull(sport)

# Process data
athletes_dat <- Aus_athletes %>%
  # only keep sports played by BOTH sexes
  filter(sport %in% both_sports) %>%
  # rename track (400m) and track (sprint) to be track
  # case_when will be very useful with shiny apps
  mutate(
    sport = case_when(
      sport == "track (400m)" ~ "track",
      sport == "track (sprint)" ~ "track",
      TRUE ~ sport
      )
    )

Hints:

  • Build each plot separately
  • Bar plot: lower limit 0, upper limit 95
  • Bar plot: shift bar labels by 5 units and top justify
  • Bar plot: label size is 5
  • Bar plot: #D55E00D0 & #0072B2D0 — no alpha
  • Scatterplot: #D55E00D0 & #0072B2D0 — no alpha
  • Scatterplot: filled circle with “white” outline; size is 3
  • Scatterplot: rcc is red blood cell count; wcc is white blood cell count
  • Boxplot: outline #D55E00 and #0072B2; shading #D55E0040 and #0072B240
  • Boxplot: should be made narrower; 0.5
  • Boxplot: Legend is in top-right corner of bottom plot
  • Boxplot: Space out labels c("female ", "male")
  • Boxplot: Legend shading matches hex values for top two plots

Using patchwork

Code
# Get list of sports played by BOTH sexes
both_sports <- Aus_athletes %>%
  # dataset of columns sex and sport 
  # only unique observations
  distinct(sex, sport) %>%
  # see if sport is played by one gender or both
  count(sport) %>%
  # only want sports played by BOTH sexes
  filter(n == 2) %>%
  # get list of sports
  pull(sport)

# Process data
athletes_dat <- Aus_athletes %>%
  # only keep sports played by BOTH sexes
  filter(sport %in% both_sports) %>%
  # rename track (400m) and track (sprint) to be track
  # case_when will be very useful with shiny apps
  mutate(
    sport = case_when(
      sport == "track (400m)" ~ "track",
      sport == "track (sprint)" ~ "track",
      TRUE ~ sport
      )
    )
plotA <- ggplot(athletes_dat, aes(sex, fill = sex)) + 
  geom_bar(show.legend = FALSE) +
  scale_fill_manual(values = c("#D55E00D0", "#0072B2D0")) +
  scale_x_discrete(
      name = NULL,
      labels = c("female", "male")
  ) + 
  scale_y_continuous(
      name = "number",
      breaks = seq(0, 100, 25),
      limits = c(0, 95),
  ) + 
  theme_minimal() +
  geom_text(stat = "count", 
    aes(label = ..count..), vjust = 2)
sex_labs <- c("female", "male")

plotB <- ggplot(athletes_dat, aes(rcc, wcc)) +
  geom_point(
      aes(fill = sex),
      shape = 21,
      color = "white",
      size = 3,
      show.legend = FALSE
    ) + 
  scale_fill_manual(values = c("#D55E00D0", "#0072B2D0")) + 
  scale_x_continuous(name = "RBC Count") + 
  scale_y_continuous(name = "WBC count") + 
  theme_minimal()

plotC <- ggplot(athletes_dat, aes(sport, pcBfat)) +
  geom_boxplot(
      aes(color = sex, fill = sex),
      width = 0.5
    ) +
  scale_fill_manual(
      name = NULL,
      labels = sex_labs,
      values = c("#D55E0040", "#0072B240")) + 
  scale_color_manual(
      name = NULL,
      labels = sex_labs,
      values = c("#D55E00D0", "#0072B2D0")) +
  guides(fill = guide_legend(
      ncol = 2,
      override.aes = list(
        fill = c("#D55E00D0", "#0072B2D0"),
        color = "transparent"
      )
    )
  ) +
  xlab(NULL) + 
  ylab("% body fat") + 
  theme_minimal() + 
  theme(
      legend.position = c(1, 1),
      legend.justification = c(1, 1),
    #set top legend to zero
      legend.margin = margin(t = 0)
  )

(plotA | plotB) /
      plotC


Using cowplot

Use cowplot::plot_grid() to combine them.

Code
plotA <- ggplot(athletes_dat, aes(sex, fill = sex)) + 
  geom_bar(show.legend = FALSE)+
  scale_fill_manual(values = c("#D55E00D0", "#0072B2D0")) +
  scale_x_discrete(
      name = NULL,
      labels = c("female", "male")
  ) +
  geom_text(stat = "count", 
    aes(label = ..count..), vjust= 2) +
  scale_y_continuous(
      name = "number",
      breaks = seq(0, 100, 25),
      limits = c(0, 95),
  ) + 
  theme_minimal()
sex_labs <- c("female", "male")

plotB <- ggplot(athletes_dat, aes(rcc, wcc)) +
  geom_point(
      aes(fill = sex),
      shape = 21,
      color = "white",
      size = 3,
      show.legend = FALSE
    ) + 
  scale_fill_manual(values = c("#D55E00D0", "#0072B2D0")) + 
  scale_x_continuous(name = "RBC Count") + 
  scale_y_continuous(name = "WBC count") + 
  theme_minimal()

plotC <- ggplot(athletes_dat, aes(sport, pcBfat)) +
  geom_boxplot(
      aes(color = sex, fill = sex),
      width = 0.5
    ) +
  scale_fill_manual(
      name = NULL,
      labels = sex_labs,
      values = c("#D55E0040", "#0072B240")) + 
  scale_color_manual(
      name = NULL,
      labels = sex_labs,
      values = c("#D55E00D0", "#0072B2D0")) +
  guides(fill = guide_legend(
      ncol = 2,
      override.aes = list(
        fill = c("#D55E00D0", "#0072B2D0"),
        color = "transparent"
      )
    )
  ) +
  xlab(NULL) + 
  ylab("% body fat") + 
  theme_minimal() + 
  theme(
      legend.position = c(1, 1),
      legend.justification = c(1, 1),
    #set top legend to zero
      legend.margin = margin(t = 0)
  )


cowplot::plot_grid(
    plot_grid(plotA, plotB, nrow = 1), plotC, nrow = 2)

Exercise 5

Create the following graphic using patchwork.

Hints:

  • Use plots created in Exercise 4
  • inset theme is classic
    • Useful values: 0, 0.45, 0.75, 1
  • plot annotation "A"
Code
plotB + inset_element(plotA, left = 0.75, bottom = 0, right = 1, top = 0.45) +
  theme_classic() + 
  plot_annotation(tag_levels = 'A')